******************************************************
**   How accurately are we measuring the LGBT  		**
** 				population in Colombia? 			** 
**			Evidence from a list experiment         **
**												    **
** 		Andrés Ham, Ángela Guarin & Juanita Ruiz    **
** 			                                        **
** 			   						                **
**  Escuela de Gobierno - Alberto Lleras Camargo    **
****************************************************** 
*** THIS DOFILE REPLICATES TABLES AND FIGURES IN THE APPENDIX

*** THIS VERSION: 07/18/2023 ***


* Options
clear all
set more off
set scheme s1mono
cap log close

* Paths
if c(username)=="ham_andres" {
	cd "/Users/ham_andres/Library/CloudStorage/Dropbox/research/colombia/LGBTIQ+/doc/Labour Economics/R&R/Replication Files"
	global dir "/Users/ham_andres/Library/CloudStorage/Dropbox/research/colombia/LGBTIQ+/doc/Labour Economics/R&R/Replication Files"
	global out "/Users/ham_andres/Library/CloudStorage/Dropbox/research/colombia/LGBTIQ+/doc/Labour Economics/R&R/Replication Files/4. Figures"
	}
if c(username)=="j.ruiz" {
	cd "C:\Users\j.ruiz\Dropbox\Andrés\LGBTIQ+\Replication Files"
	global dir "C:\Users\j.ruiz\Dropbox\Andrés\LGBTIQ+\Replication Files"
	global out "C:\Users\j.ruiz\Dropbox\Andrés\LGBTIQ+\Replication Files\4. Figures"
	}

	
*----------------------------------------------------------------------------------*
*	Table A.1. Proposed and implemented efforts to measure the LGBT population in Colombia
*----------------------------------------------------------------------------------*		
	
* In Excel file.	
	

*----------------------------------------------------------------------------------------*
*	Table A.2 Profile of self-reported LGBT and non-LGBT population in household surveys *  
*       								Full Sample 									 *
*----------------------------------------------------------------------------------------*		

* Load Data	
use "$dir/1.GEIH_LGBT.dta", clear

* Update weights
gen weight_2021=pondera/12
gen weight_2022=pondera/5 //denominator changes according to the number of months included
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)

* Keeps Bogota (Bogotá: área 11, dpto 11)
keep if dpto==11

* Space variable for nice-looking tables
gen space=0

** Pre-processing
gen rango_edad=.
replace rango_edad=1 if edad>=18 & edad<=29
replace rango_edad=2 if edad>=30 & edad<=50
replace rango_edad=3 if edad>=51 & edad<.
label var rango "Rango de edad"
label define rango 1"18-29" 2"30-50" 3">51"
label values rango_edad rango
tab rango_edad, g(age_)


gen female=sexo==2

gen est_civil=.
replace est_civil=1 if estado_civil==1 | estado_civil==2
replace est_civil=2 if estado_civil==3
replace est_civil=3 if estado_civil==6
replace est_civil=4 if estado_civil==4 | estado_civil==5
tab est_civil, g(civil_)

destring estrato, replace
replace estrato=. if estrato==0 | estrato==9
tab estrato, g(estr_)

* Gay
gen gay=0
replace gay=1 if ident_sex==1 & atraccion==1 | sexo==1 & ident_sex==1 & atraccion==1
* Lesbian
gen lesb=0
replace lesb=1 if  ident_sex==2 & atraccion==2 | sexo==2 & ident_sex==2 & atraccion==2
* LG
gen lg=0
replace lg=1 if gay==1 | lesb==1
* Bisexual
gen bisex=0
replace bisex=1 if atraccion==3


** DEMOGRAPHICS

** LGBT vs Non LGBT
local outcomes "female space space age_1 age_2 age_3 space space civil_1 civil_2 civil_3 civil_4 space space estr_1 estr_2 estr_3 estr_4 estr_5 estr_6"

mat perfiles_1=J(31,7,.)

loc i=1
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0 
	mat perfiles_1[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1
	mat perfiles_1[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight], r
	test lgbt_numerica
	mat perfiles_1[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1
	mat perfiles_1[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1
	mat perfiles_1[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1
	mat perfiles_1[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight], r
	test lg=bisex=trans_numerica
	mat perfiles_1[`i',7]=r(p)	
	
	
	loc ++i
}

** EDUCATION & HEALTH

tab educ, g(nedu_)
tab regimen_salud, g(reg_sal_)

local outcomes "nedu_1 nedu_2 nedu_3 nedu_4 space estudia space salud"

loc i=23
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0 
	mat perfiles_1[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1
	mat perfiles_1[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight], r
	test lgbt_numerica
	mat perfiles_1[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1
	mat perfiles_1[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1
	mat perfiles_1[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1
	mat perfiles_1[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight], r
	test lg=bisex=trans_numerica
	mat perfiles_1[`i',7]=r(p)		
	
	loc ++i
}

count if lgbt_numerica==0
mat perfiles_1[31,1]=r(N)

count if lgbt_numerica==1
mat perfiles_1[31,2]=r(N)

count if lg==1
mat perfiles_1[31,4]=r(N)

count if bisex==1
mat perfiles_1[31,5]=r(N)

count if trans_numerica==1
mat perfiles_1[31,6]=r(N)

preserve
drop _all
svmat double perfiles_1
export excel using "$out/TableA2.xlsx", replace
restore	
	

*----------------------------------------------------------------------------------------*
*	Table A.3 Profile of self-reported LGBT and non-LGBT population in household surveys *  
*       								Born Female 									 *
*----------------------------------------------------------------------------------------*

* Load Data	
use "$dir/1.GEIH_LGBT.dta", clear

* Update weights
gen weight_2021=pondera/12
gen weight_2022=pondera/5 //denominator changes according to the number of months included
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)

* Keeps Bogota (Bogotá: área 11, dpto 11)
keep if dpto==11

* Space variable for nice-looking tables
gen space=0

** Pre-processing
gen rango_edad=.
replace rango_edad=1 if edad>=18 & edad<=29
replace rango_edad=2 if edad>=30 & edad<=50
replace rango_edad=3 if edad>=51 & edad<.
label var rango "Rango de edad"
label define rango 1"18-29" 2"30-50" 3">51"
label values rango_edad rango
tab rango_edad, g(age_)


gen female=sexo==2

gen est_civil=.
replace est_civil=1 if estado_civil==1 | estado_civil==2
replace est_civil=2 if estado_civil==3
replace est_civil=3 if estado_civil==6
replace est_civil=4 if estado_civil==4 | estado_civil==5
tab est_civil, g(civil_)

destring estrato, replace
replace estrato=. if estrato==0 | estrato==9
tab estrato, g(estr_)

* Gay
gen gay=0
replace gay=1 if ident_sex==1 & atraccion==1 | sexo==1 & ident_sex==1 & atraccion==1
* Lesbian
gen lesb=0
replace lesb=1 if  ident_sex==2 & atraccion==2 | sexo==2 & ident_sex==2 & atraccion==2
* LG
gen lg=0
replace lg=1 if gay==1 | lesb==1
* Bisexual
gen bisex=0
replace bisex=1 if atraccion==3


** DEMOGRAPHICS

** LGBT vs Non LGBT
local outcomes "age_1 age_2 age_3 space space civil_1 civil_2 civil_3 civil_4 space space estr_1 estr_2 estr_3 estr_4 estr_5 estr_6"

mat perfiles_2=J(28,7,.)

loc i=1
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0 & female==1
	mat perfiles_2[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1 & female==1
	mat perfiles_2[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight] if female==1, r
	test lgbt_numerica
	mat perfiles_2[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1 & female==1
	mat perfiles_2[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1 & female==1
	mat perfiles_2[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1 & female==1
	mat perfiles_2[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight] if female==1, r
	test lg=bisex=trans_numerica
	mat perfiles_2[`i',7]=r(p)	
	
	
	loc ++i
}

** EDUCATION & HEALTH

tab educ, g(nedu_)
tab regimen_salud, g(reg_sal_)

local outcomes "nedu_1 nedu_2 nedu_3 nedu_4 space estudia space salud"

loc i=20
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0 & female==1
	mat perfiles_2[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1 & female==1
	mat perfiles_2[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight] if female==1, r
	test lgbt_numerica
	mat perfiles_2[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1 & female==1
	mat perfiles_2[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1 & female==1
	mat perfiles_2[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1 & female==1
	mat perfiles_2[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight] if female==1, r
	test lg=bisex=trans_numerica
	mat perfiles_2[`i',7]=r(p)		
	
	loc ++i
}

count if lgbt_numerica==0 & female==1
mat perfiles_2[28,1]=r(N)

count if lgbt_numerica==1 & female==1
mat perfiles_2[28,2]=r(N)

count if lg==1 & female==1
mat perfiles_2[28,4]=r(N)

count if bisex==1 & female==1
mat perfiles_2[28,5]=r(N)

count if trans_numerica==1 & female==1
mat perfiles_2[28,6]=r(N)

preserve
drop _all
svmat double perfiles_2
export excel using "$out/TableA3.xlsx", replace
restore		
	
	
*----------------------------------------------------------------------------------------*
*	Table A.4 Profile of self-reported LGBT and non-LGBT population in household surveys *  
*       								Born Male 									 	 *
*----------------------------------------------------------------------------------------*

* Load Data	
use "$dir/1.GEIH_LGBT.dta", clear

* Update weights
gen weight_2021=pondera/12
gen weight_2022=pondera/5 //denominator changes according to the number of months included
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)

* Keeps Bogota (Bogotá: área 11, dpto 11)
keep if dpto==11

* Space variable for nice-looking tables
gen space=0

** Pre-processing
gen rango_edad=.
replace rango_edad=1 if edad>=18 & edad<=29
replace rango_edad=2 if edad>=30 & edad<=50
replace rango_edad=3 if edad>=51 & edad<.
label var rango "Rango de edad"
label define rango 1"18-29" 2"30-50" 3">51"
label values rango_edad rango
tab rango_edad, g(age_)


gen female=sexo==2

gen est_civil=.
replace est_civil=1 if estado_civil==1 | estado_civil==2
replace est_civil=2 if estado_civil==3
replace est_civil=3 if estado_civil==6
replace est_civil=4 if estado_civil==4 | estado_civil==5
tab est_civil, g(civil_)

destring estrato, replace
replace estrato=. if estrato==0 | estrato==9
tab estrato, g(estr_)

* Gay
gen gay=0
replace gay=1 if ident_sex==1 & atraccion==1 | sexo==1 & ident_sex==1 & atraccion==1
* Lesbian
gen lesb=0
replace lesb=1 if  ident_sex==2 & atraccion==2 | sexo==2 & ident_sex==2 & atraccion==2
* LG
gen lg=0
replace lg=1 if gay==1 | lesb==1
* Bisexual
gen bisex=0
replace bisex=1 if atraccion==3


** DEMOGRAPHICS

** LGBT vs Non LGBT
local outcomes "age_1 age_2 age_3 space space civil_1 civil_2 civil_3 civil_4 space space estr_1 estr_2 estr_3 estr_4 estr_5 estr_6"

mat perfiles_3=J(28,7,.)

loc i=1
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0 & female==0
	mat perfiles_3[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1 & female==0
	mat perfiles_3[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight] if female==0, r
	test lgbt_numerica
	mat perfiles_3[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1 & female==0
	mat perfiles_3[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1 & female==0
	mat perfiles_3[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1 & female==0
	mat perfiles_3[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight] if female==0, r
	test lg=bisex=trans_numerica
	mat perfiles_3[`i',7]=r(p)	
	
	
	loc ++i
}

** EDUCATION & HEALTH

tab educ, g(nedu_)
tab regimen_salud, g(reg_sal_)

local outcomes "nedu_1 nedu_2 nedu_3 nedu_4 space estudia space salud"

loc i=20
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0 & female==0
	mat perfiles_3[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1 & female==0
	mat perfiles_3[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight] if female==0, r
	test lgbt_numerica
	mat perfiles_3[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1 & female==0
	mat perfiles_3[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1 & female==0
	mat perfiles_3[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1 & female==0
	mat perfiles_3[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight] if female==0, r
	test lg=bisex=trans_numerica
	mat perfiles_3[`i',7]=r(p)	
	
	loc ++i
}

count if lgbt_numerica==0 & female==0
mat perfiles_3[28,1]=r(N)

count if lgbt_numerica==1 & female==0
mat perfiles_3[28,2]=r(N)

count if lg==1 & female==0
mat perfiles_3[28,4]=r(N)

count if bisex==1 & female==0
mat perfiles_3[28,5]=r(N)

count if trans_numerica==1 & female==0
mat perfiles_3[28,6]=r(N)

preserve
drop _all
svmat double perfiles_3
export excel using "$out/TableA4.xlsx", replace
restore		
	

*----------------------------------------------------------------------------*
*Table A.5. Labor market outcomes for non-LGBT and LGBT population in Bogotá *
*       						Full Sample 							     *
*----------------------------------------------------------------------------*

* Load data
use "$dir/1.GEIH_LGBT.dta", clear

* Update weights
gen weight_2021=pondera/12
gen weight_2022=pondera/5 //denominator changes according to the number of months included
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)

* Keeps Bogota (Bogotá: área 11, dpto 11)
keep if dpto==11

* Space variable for nice-looking tables
gen space=0


* Pre-processing
gen rango_edad=.
replace rango_edad=1 if edad>=18 & edad<=25
replace rango_edad=2 if edad>=26 & edad<=40
replace rango_edad=3 if edad>=41 & edad<=55
replace rango_edad=4 if edad>=56 & edad!=.
label var rango "Rango de edad"
label define rango 1"18-25" 2"26-40" 3"41-55" 4"56 or more"
label values rango_edad rango
tab rango_edad, g(age_)

gen female=sexo==2

gen est_civil=.
replace est_civil=1 if estado_civil==1 | estado_civil==2
replace est_civil=2 if estado_civil==3
replace est_civil=3 if estado_civil==6
replace est_civil=4 if estado_civil==4 | estado_civil==5
tab est_civil, g(civil_)

destring estrato, replace
replace estrato=. if estrato==0 | estrato==9
tab estrato, g(estr_)

destring gusta_empleo, replace
replace gusta_empleo=0 if gusta_empleo==2

destring cambio_empleo, replace
replace cambio_empleo=0 if cambio_empleo==2

destring problemas_empleo, replace
replace problemas_empleo=0 if problemas_empleo==2

* Gay
gen gay=0
replace gay=1 if ident_sex==1 & atraccion==1 | sexo==1 & ident_sex==1 & atraccion==1
* Lesbian
gen lesb=0
replace lesb=1 if  ident_sex==2 & atraccion==2 | sexo==2 & ident_sex==2 & atraccion==2
* LG
gen lg=0
replace lg=1 if gay==1 | lesb==1
* Bisexual
gen bisex=0
replace bisex=1 if atraccion==3


***** PROFILES: LABOR *****

tab cat_empleo, g(cate_)

local outcomes "pea space ocupado des_aux space horas_t space space cate_1 cate_2 cate_3 cate_4 space contrato cotiza_pension space inglabo"

mat perfiles_4=J(21,7,.)

loc i=1
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0
	mat perfiles_4[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1
	mat perfiles_4[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight], r
	test lgbt_numerica
	mat perfiles_4[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1
	mat perfiles_4[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1
	mat perfiles_4[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1
	mat perfiles_4[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight], r
	test lg=bisex=trans_numerica
	mat perfiles_4[`i',7]=r(p)	
	
	loc ++i
}

local outcomes "gusta_empleo cambio_empleo problemas_empleo"

loc i=19
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0
	mat perfiles_4[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1
	mat perfiles_4[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight], r
	test lgbt_numerica
	mat perfiles_4[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1
	mat perfiles_4[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1
	mat perfiles_4[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1
	mat perfiles_4[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight], r
	test lg=bisex=trans_numerica
	mat perfiles_4[`i',7]=r(p)	
	
	loc ++i
	}


preserve
drop _all
svmat double perfiles_4
export excel using "$out/TableA5.xlsx", replace
restore		


*----------------------------------------------------------------------------*
*Table A.6. Labor market outcomes for non-LGBT and LGBT population in Bogotá *
*       						Born Female 							     *
*----------------------------------------------------------------------------*

* Load data
use "$dir/1.GEIH_LGBT.dta", clear

* Update weights
gen weight_2021=pondera/12
gen weight_2022=pondera/5 //denominator changes according to the number of months included
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)

* Keeps Bogota (Bogotá: área 11, dpto 11)
keep if dpto==11

* Space variable for nice-looking tables
gen space=0


* Pre-processing
gen rango_edad=.
replace rango_edad=1 if edad>=18 & edad<=25
replace rango_edad=2 if edad>=26 & edad<=40
replace rango_edad=3 if edad>=41 & edad<=55
replace rango_edad=4 if edad>=56 & edad!=.
label var rango "Rango de edad"
label define rango 1"18-25" 2"26-40" 3"41-55" 4"56 or more"
label values rango_edad rango
tab rango_edad, g(age_)

gen female=sexo==2

gen est_civil=.
replace est_civil=1 if estado_civil==1 | estado_civil==2
replace est_civil=2 if estado_civil==3
replace est_civil=3 if estado_civil==6
replace est_civil=4 if estado_civil==4 | estado_civil==5
tab est_civil, g(civil_)

destring estrato, replace
replace estrato=. if estrato==0 | estrato==9
tab estrato, g(estr_)

destring gusta_empleo, replace
replace gusta_empleo=0 if gusta_empleo==2

destring cambio_empleo, replace
replace cambio_empleo=0 if cambio_empleo==2

destring problemas_empleo, replace
replace problemas_empleo=0 if problemas_empleo==2

* Gay
gen gay=0
replace gay=1 if ident_sex==1 & atraccion==1 | sexo==1 & ident_sex==1 & atraccion==1
* Lesbian
gen lesb=0
replace lesb=1 if  ident_sex==2 & atraccion==2 | sexo==2 & ident_sex==2 & atraccion==2
* LG
gen lg=0
replace lg=1 if gay==1 | lesb==1
* Bisexual
gen bisex=0
replace bisex=1 if atraccion==3


***** PROFILES: LABOR *****

tab cat_empleo, g(cate_)

local outcomes "pea space ocupado des_aux space horas_t space space cate_1 cate_2 cate_3 cate_4 space contrato cotiza_pension space inglabo"

mat perfiles_5=J(21,7,.)

loc i=1
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0 & female==1
	mat perfiles_5[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1 & female==1
	mat perfiles_5[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight] if female==1, r
	test lgbt_numerica
	mat perfiles_5[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1 & female==1
	mat perfiles_5[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1 & female==1
	mat perfiles_5[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1 & female==1
	mat perfiles_5[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight] if female==1, r
	test lg=bisex=trans_numerica
	mat perfiles_5[`i',7]=r(p)	
	
	loc ++i
}

local outcomes "gusta_empleo cambio_empleo problemas_empleo"

loc i=19
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0 & female==1
	mat perfiles_5[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1 & female==1
	mat perfiles_5[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight] if female==1, r
	test lgbt_numerica
	mat perfiles_5[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1 & female==1
	mat perfiles_5[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1 & female==1
	mat perfiles_5[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1 & female==1
	mat perfiles_5[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight] if female==1, r
	test lg=bisex=trans_numerica
	mat perfiles_5[`i',7]=r(p)	
	
	loc ++i
	}


preserve
drop _all
svmat double perfiles_5
export excel using "$out/TableA6.xlsx", replace
restore		


*----------------------------------------------------------------------------*
*Table A.7. Labor market outcomes for non-LGBT and LGBT population in Bogotá *
*       						Born Male 							     *
*----------------------------------------------------------------------------*

* Load data
use "$dir/1.GEIH_LGBT.dta", clear

* Update weights
gen weight_2021=pondera/12
gen weight_2022=pondera/5 //denominator changes according to the number of months included
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)

* Keeps Bogota (Bogotá: área 11, dpto 11)
keep if dpto==11

* Space variable for nice-looking tables
gen space=0


* Pre-processing
gen rango_edad=.
replace rango_edad=1 if edad>=18 & edad<=25
replace rango_edad=2 if edad>=26 & edad<=40
replace rango_edad=3 if edad>=41 & edad<=55
replace rango_edad=4 if edad>=56 & edad!=.
label var rango "Rango de edad"
label define rango 1"18-25" 2"26-40" 3"41-55" 4"56 or more"
label values rango_edad rango
tab rango_edad, g(age_)

gen female=sexo==2

gen est_civil=.
replace est_civil=1 if estado_civil==1 | estado_civil==2
replace est_civil=2 if estado_civil==3
replace est_civil=3 if estado_civil==6
replace est_civil=4 if estado_civil==4 | estado_civil==5
tab est_civil, g(civil_)

destring estrato, replace
replace estrato=. if estrato==0 | estrato==9
tab estrato, g(estr_)

destring gusta_empleo, replace
replace gusta_empleo=0 if gusta_empleo==2

destring cambio_empleo, replace
replace cambio_empleo=0 if cambio_empleo==2

destring problemas_empleo, replace
replace problemas_empleo=0 if problemas_empleo==2

* Gay
gen gay=0
replace gay=1 if ident_sex==1 & atraccion==1 | sexo==1 & ident_sex==1 & atraccion==1
* Lesbian
gen lesb=0
replace lesb=1 if  ident_sex==2 & atraccion==2 | sexo==2 & ident_sex==2 & atraccion==2
* LG
gen lg=0
replace lg=1 if gay==1 | lesb==1
* Bisexual
gen bisex=0
replace bisex=1 if atraccion==3


***** PROFILES: LABOR *****

tab cat_empleo, g(cate_)

local outcomes "pea space ocupado des_aux space horas_t space space cate_1 cate_2 cate_3 cate_4 space contrato cotiza_pension space inglabo"

mat perfiles_6=J(21,7,.)

loc i=1
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0 & female==0
	mat perfiles_6[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1 & female==0
	mat perfiles_6[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight] if female==0, r
	test lgbt_numerica
	mat perfiles_6[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1 & female==0
	mat perfiles_6[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1 & female==0
	mat perfiles_6[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1 & female==0
	mat perfiles_6[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight] if female==0, r
	test lg=bisex=trans_numerica
	mat perfiles_6[`i',7]=r(p)	
	
	loc ++i
}

local outcomes "gusta_empleo cambio_empleo problemas_empleo"

loc i=19
foreach x of local outcomes {
	
	* Non-LGBT
	summ `x' [aw=weight] if lgbt_numerica==0 & female==0
	mat perfiles_6[`i',1]=r(mean)
	
	* LGBT
	summ `x' [aw=weight] if lgbt_numerica==1 & female==0
	mat perfiles_6[`i',2]=r(mean)	
	
	* Difference
	reg `x' lgbt_numerica i.time [aw=weight] if female==0, r
	test lgbt_numerica
	mat perfiles_6[`i',3]=r(p)	
	
	* LG
	summ `x' [aw=weight] if lg==1 & female==0
	mat perfiles_6[`i',4]=r(mean)
	
	* B
	summ `x' [aw=weight] if bisex==1 & female==0
	mat perfiles_6[`i',5]=r(mean)		
	
	* T
	summ `x' [aw=weight] if trans_numerica==1 & female==0
	mat perfiles_6[`i',6]=r(mean)			
	
	* Difference
	reg `x' lg bisex trans_numerica i.time [aw=weight] if female==0, r
	test lg=bisex=trans_numerica
	mat perfiles_6[`i',7]=r(p)	
	
	loc ++i
	}


preserve
drop _all
svmat double perfiles_6
export excel using "$out/TableA7.xlsx", replace
restore	


*---------------------------------------------------------------------------*
* Table A.8. - Table.A.9 - Table A.10. Oaxaca-Blinder decompositions for labor 
*market outcomes in Bogotá (Full sample) - (male at birth) - (female at birth)
*---------------------------------------------------------------------------*

* Load data
use "$dir/1.GEIH_LGBT.dta", clear

* Update weights
gen weight_2021=pondera/12
gen weight_2022=pondera/5 //denominator changes according to the number of months included
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)

* Keeps Bogota (Bogotá: área 11, dpto 11)
keep if dpto==11

* Otras variables
gen edad2=edad^2

* Labor market outcomes
gen log_earnings=ln(inglabo)

* OAXACA BLINDER
tab estado_civil, g(est_)
tab educ, g(nedu_)
tab estrato, g(estr_)
tab time, g(my_)
tab dpto, g(dpto_)

* Locals
local outcomes "pea ocupado des_aux informal log_earnings"
local controls "edad edad2 est_2 est_3 est_4 est_5 est_6 nedu_2 nedu_3 nedu_4 estr_2 estr_3 estr_4 estr_5 estr_6 my_2-my_17 per_hog"
	
foreach y of local outcomes {
		
	* ALL
	oaxaca `y' `controls' [aw=weight], by(lgbt_numerica) vce(robust) relax
	estimates store `y'_all1
	
	oaxaca `y' `controls' [aw=weight], by(lgbt_numerica) vce(robust) pooled relax
	estimates store `y'_all2
		
	* BORN MALE
	oaxaca `y' `controls' [aw=weight] if sexo==1, by(lgbt_numerica) vce(robust) relax
	estimates store `y'_male1
	
	oaxaca `y' `controls' [aw=weight] if sexo==1, by(lgbt_numerica) vce(robust) pooled relax
	estimates store `y'_male2
	
	* BORN FEMALE
	oaxaca `y' `controls' [aw=weight] if sexo==2, by(lgbt_numerica) vce(robust) relax
	estimates store `y'_female1
	
	oaxaca `y' `controls' [aw=weight] if sexo==2, by(lgbt_numerica) vce(robust) pooled relax
	estimates store `y'_female2
	
}

loc sample "all"

foreach s of local sample {

	esttab pea_`s'1 ocupado_`s'1 des_aux_`s'1 informal_`s'1 log_earnings_`s'1			///
		using "$out/TableA8.csv", csv replace   										///		
		b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 										///
		keep(group_1 group_2 difference endowments coefficients interaction) 			///
		nonotes nolines compress staraux 												///
		title("`s'") mtitles("LFP" "ER" "UR" "Informality" "Log Earnings")				///
		stats(r2_a N, label("Adjusted R2" "N") fmt(3 0))
		
	esttab pea_`s'2 ocupado_`s'2 des_aux_`s'2 informal_`s'2 log_earnings_`s'2			///
		using "$out/TableA8.csv", csv append   											///		
		b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 										///
		keep(group_1 group_2 difference explained unexplained) 				 			///
		nonotes nolines compress staraux 												///
		title("`s'") mtitles("LFP" "ER" "UR" "Informality" "Log Earnings")				///
		stats(r2_a N, label("Adjusted R2" "N") fmt(3 0))	

}

loc sample "female"

foreach s of local sample {

	esttab pea_`s'1 ocupado_`s'1 des_aux_`s'1 informal_`s'1 log_earnings_`s'1			///
		using "$out/TableA9.csv", csv replace   										///		
		b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 										///
		keep(group_1 group_2 difference endowments coefficients interaction) 			///
		nonotes nolines compress staraux 												///
		title("`s'") mtitles("LFP" "ER" "UR" "Informality" "Log Earnings")				///
		stats(r2_a N, label("Adjusted R2" "N") fmt(3 0))
		
	esttab pea_`s'2 ocupado_`s'2 des_aux_`s'2 informal_`s'2 log_earnings_`s'2			///
		using "$out/TableA9.csv", csv append   											///		
		b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 										///
		keep(group_1 group_2 difference explained unexplained) 				 			///
		nonotes nolines compress staraux 												///
		title("`s'") mtitles("LFP" "ER" "UR" "Informality" "Log Earnings")				///
		stats(r2_a N, label("Adjusted R2" "N") fmt(3 0))	

}

loc sample "male"

foreach s of local sample {

	esttab pea_`s'1 ocupado_`s'1 des_aux_`s'1 informal_`s'1 log_earnings_`s'1			///
		using "$out/TableA10.csv", csv replace   										///		
		b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 										///
		keep(group_1 group_2 difference endowments coefficients interaction) 			///
		nonotes nolines compress staraux 												///
		title("`s'") mtitles("LFP" "ER" "UR" "Informality" "Log Earnings")				///
		stats(r2_a N, label("Adjusted R2" "N") fmt(3 0))
		
	esttab pea_`s'2 ocupado_`s'2 des_aux_`s'2 informal_`s'2 log_earnings_`s'2			///
		using "$out/TableA10.csv", csv append   										///		
		b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 										///
		keep(group_1 group_2 difference explained unexplained) 				 			///
		nonotes nolines compress staraux 												///
		title("`s'") mtitles("LFP" "ER" "UR" "Informality" "Log Earnings")				///
		stats(r2_a N, label("Adjusted R2" "N") fmt(3 0))	

}


*----------------------------------------------------------------------------------*
*	Table A.11. Example of list experiment question for attraction to the same sex
*----------------------------------------------------------------------------------*		
	
* In Excel file.

*----------------------------------------------------------------------------------*
*	Table A.12. Example of list experiment question for attraction to both sexes
*----------------------------------------------------------------------------------*		
	
* In Excel file.


*---------------------------------------------------------------------------*
* Table A.13. Prevalence of Ceiling and floor effects
*---------------------------------------------------------------------------*

* Load Data	
use "$dir/2.List Experiment database.dta", clear

* 1. Build d_qi for each direct question
*Gender identity
gen d_1=0
replace d_1=1 if sexo_nace!=identidad & identidad!=.
*Same sex attraction
gen d_2=0
replace d_2=1 if sexo_nace==1 & orientacion==1 | identidad==1 & orientacion==1 | identidad==3 & orientacion==1 | sexo_nace==2 & orientacion==2 | identidad==2 & orientacion==2 | identidad==4 & orientacion==2 
	//Should we just take into account identity and not sex?
	//Sexual orientation is supposed to be associated with gender
*Both sexes
gen d_3=0
replace d_3=1 if orientacion==3
*Compliance
gen d_4=d_2


* 2. Build y^d_qi= d_qi + c_qi
local lists "1 2 3"
foreach i of local lists {
	gen ld_`i'= l`i'_dir + d_`i'
}

gen ld_4= l4_ind + d_4

* 3. Build y_qi= y^d_qi + y^v_qi
foreach i of local lists {
	gen l`i'=.
	replace l`i'= ld_`i' if grupo==0
	replace l`i'= l`i'_ind if grupo==1
}

gen l4=.
replace l4= ld_4 if grupo==1
replace l4= l4_dir if grupo==0


** 1. Report Ceiling and Floor Effects **

* A. Wihout Expansion Factors
* Matrix
mat table0=J(4,2,.)
mat table1=J(4,2,.)

loc outcomes "l1 l2 l3 l4"  
loc grupo "0 1" 
foreach z of local grupo {
loc i=1
foreach var of local outcomes {

*Percentage of the # of valid statements reported	
estpost tabulate `var' if grupo==`z'

mat list e(pct)
mat a`z'_`var' = e(pct)

*Store Minimum Value
scalar a`z'`i'=a`z'_`var'[1,1]

*Store Maximum Value
if `z'==0  {
scalar b`z'`i'=a`z'_`var'[1,5]
}
if `z'==1 {
scalar b`z'`i'=a`z'_`var'[1,6]
}
scalar list a`z'`i'
scalar list b`z'`i'

mat table`z'[`i',1]=a`z'`i'
mat table`z'[`i',2]=b`z'`i'

matlist table`z'

loc i=`i'+1

	}
}

matlist table0
matlist table1

*Maximum Values for list #4
//Direct
	//Percentage of the # of valid statements reported	
	estpost tabulate l4 if grupo==0
	mat list e(pct)
	mat a0= e(pct)
	//Fix maximum value
	scalar a04=a0[1,6]
	scalar list a04
	mat table0[4,2]=a04
//Veiled
	//Percentage of the # of valid statements reported	
	estpost tabulate l4 if grupo==1
	mat list e(pct)
	mat a1= e(pct)
	//Fix maximum value
	scalar a14=a1[1,5]
	scalar list a14
	mat table1[4,2]=a14
	
matlist table0
matlist table1

preserve 
	drop _all
	svmat table0 
	svmat table1
	
	gen list=""
	replace list="List 1" if _n==1
	replace list="List 2" if _n==2
	replace list="List 3" if _n==3
	replace list="List 4 (Compliance)" if _n==4

order list
export excel using "$out/TableA13.xlsx", cell(A3) firstrow(varlab) sheet("Percentage CF Effects", modify) keepcellfmt
restore 

*---------------------------------------------------------------------------*
* Figure A.1. Composition of LGBT population by sex at birth
*---------------------------------------------------------------------------*

* Cargamos los datos
use "$dir/1.GEIH_LGBT.dta", clear

*Actualizamos ponderadores
gen weight_2021=pondera/12
gen weight_2022=pondera/5 //va aumentando el denominador a medida que agregamos meses porque estamos sacando estadísticas anuales
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)


mat population=J(3,1,.)

gen reg1=.
replace reg1=1 if dpto==11
replace reg1=2 if clase==1 & dpto~=11
replace reg1=3 if clase==2 & dpto~=11


mat population_type=J(3,4,.)

* Gay
gen gay=0
replace gay=1 if ident_sex==1 & atraccion==1 | sexo==1 & ident_sex==1 & atraccion==1
* Lesbian
gen lesb=0
replace lesb=1 if  ident_sex==2 & atraccion==2 | sexo==2 & ident_sex==2 & atraccion==2
* Bisexual
gen bisex=0
replace bisex=1 if atraccion==3


mat population_type_sex=J(3,8,.)

loc variables "lesb gay bisex trans_numerica"
loc j=1
foreach var of local variables {
	
	replace `var'=. if lgbt_numerica==.
	
	forvalues r=1/3 {
	
		* Born male
		summ `var' [w=weight] if reg1==`r' & sexo==1
		mat population_type_sex[`r',`j']=r(mean)
		
		* Born female
		summ `var' [w=weight] if reg1==`r' & sexo==2
		mat population_type_sex[`r',`j'+4]=r(mean)		
	}

	loc ++j
}

preserve
drop _all
svmat double population_type_sex

gen region=_n
ren population_type_sex1 l_male
ren population_type_sex2 g_male
ren population_type_sex3 b_male
ren population_type_sex4 t_male
ren population_type_sex5 l_female
ren population_type_sex6 g_female
ren population_type_sex7 b_female
ren population_type_sex8 t_female
label define regions 1 "Bogotá" 2 "Other urban areas" 3 "Rural areas"
label values region regions

foreach var of varlist l_male-t_female {
	replace `var'=`var'*100
}

graph bar l_male g_male b_male t_male, over(region, label(labsize(small))) 					///
	asyvars bar(1, color(lavender)) bar(2, color(ebblue)) 									///
	bar(3, color(mint)) bar(4, color(cranberry)) bargap(10) 								///
	blabel(bar, format(%4.2fc)) 															///
	ytitle("Percentage") ylabel(0(0.5)2.5, format(%4.1fc) nogrid) 							///
	legend(rows(1) label(1 "Lesbian") label(2 "Gay") label(3 "Bisexual") label(4 "Trans") 	///
	region(lstyle(none)) size(small)) name(bornmale, replace) subtitle("Born male")
	
graph bar l_female g_female b_female t_female, over(region, label(labsize(small))) 			///
	asyvars bar(1, color(lavender)) bar(2, color(ebblue)) 									///
	bar(3, color(mint)) bar(4, color(cranberry)) bargap(10) 								///
	blabel(bar, format(%4.2fc)) 															///
	ytitle("Percentage") ylabel(0(0.5)2.5, format(%4.1fc) nogrid) 							///
	legend(rows(1) label(1 "Lesbian") label(2 "Gay") label(3 "Bisexual") label(4 "Trans") 	///
	region(lstyle(none)) size(small)) name(bornfemale, replace) subtitle("Born female")	

grc1leg bornmale bornfemale, rows(1) cols(2) ycommon iscale(*0.8) legendfrom(bornmale) span
graph export "$out/FigureA1.pdf", replace
	
restore


